part 1¶

NST implementation¶

In [1]:
%matplotlib inline
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.optim as optim
import requests
from torchvision import transforms, models

pretrained model¶

In [2]:
# get the VGG19 
vgg = models.vgg19(pretrained=True)

#change all max-pooling layers to average-pooling layers
for i, layer in enumerate(vgg.features):
    if isinstance(layer, torch.nn.MaxPool2d):
        vgg.features[i] = torch.nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
        
vgg = vgg.features

# freeze all parameters
for param in vgg.parameters():
    param.requires_grad_(False)
In [3]:
# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vgg.to(device)
Out[3]:
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (20): ReLU(inplace=True)
  (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (22): ReLU(inplace=True)
  (23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (24): ReLU(inplace=True)
  (25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (26): ReLU(inplace=True)
  (27): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (29): ReLU(inplace=True)
  (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (31): ReLU(inplace=True)
  (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (33): ReLU(inplace=True)
  (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (35): ReLU(inplace=True)
  (36): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
In [4]:
def load_image(img_path, max_size=512, shape=None):
    image = Image.open(img_path).convert('RGB')
    
    # set the largest size
    if max(image.size) > max_size:
        size = max_size
    else:
        size = max(image.size)
    
    if shape is not None:
        size = shape
        
    in_transform = transforms.Compose([
                        transforms.Resize(size),
                        transforms.ToTensor(),
                        transforms.Normalize((0.485, 0.456, 0.406), 
                                             (0.229, 0.224, 0.225))])

    # discard the transparent, alpha channel and add the batch dimension
    image = in_transform(image)[:3,:,:].unsqueeze(0)
    return image

select input image (content and style)¶

In [43]:
# load in content and style image
content = load_image('pp1.jpg').to(device)
# Resize style to match content
style = load_image('mona.jpg', shape=content.shape[-2:]).to(device)
In [44]:
# helper function for un-normalizing an image 
# and converting it from a Tensor image to a NumPy image for display
def im_convert(tensor):
    
    image = tensor.to("cpu").clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1,2,0)
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    image = image.clip(0, 1)

    return image
In [45]:
# display the images for checking
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.imshow(im_convert(content))
ax2.imshow(im_convert(style))
Out[45]:
<matplotlib.image.AxesImage at 0x7fa20a8546d0>
In [46]:
def get_features(image, model, layers=None):
    if layers is None:
        layers = {'0': 'conv1_1',
                  '5': 'conv2_1', 
                  '10': 'conv3_1', 
                  '19': 'conv4_1',
                  '21': 'conv4_2',
                  '28': 'conv5_1'}
        
    features = {}
    x = image
    for name, layer in model._modules.items():
        x = layer(x)
        if name in layers:
            features[layers[name]] = x
            
    return features
In [47]:
# reference: https://discuss.pytorch.org/t/implementation-of-gram-matrix-in-neural-style-tutorial/46803
def gram_matrix(tensor):
    
    # get the batch_size, depth, height, and width of the Tensor
    b, d, h, w = tensor.size()
    
    # reshape so we're multiplying the features for each channel
    tensor = tensor.view(d, h * w)
    
    # calculate the gram matrix
    gram = torch.mm(tensor, tensor.t())
    
    return gram 

layer selection and target selection¶

In [48]:
# get content and style features before training
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)

# calculate the gram matrices for each layer of our style representation
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}

# create a third "target" image and prep it for change
target = content.clone().requires_grad_(True).to(device)
In [49]:
#calculate the *total* loss
def total_Loss():

    style_weights = {'conv1_1': 0.2,
                 'conv2_1': 0.2,
                 'conv3_1': 0.3,
                 'conv4_1': 0.4,
                 'conv5_1': 0.7}

    content_weight = 1  # alpha
    style_weight = 1e6  # beta
    
    # get the features from your target image
    target_features = get_features(target, vgg)
    
    # the content loss
    content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)
    
    # the style loss
    # initialize the style loss to 0
    style_loss = 0
    # then add to it for each layer's gram matrix loss
    for layer in style_weights:
        # get the "target" style representation for the layer
        target_feature = target_features[layer]
        target_gram = gram_matrix(target_feature)
        _, d, h, w = target_feature.shape
        # get the "style" style representation
        style_gram = style_grams[layer]
        # the style loss for one layer, weighted appropriately
        layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2)
        # add to the style loss
        style_loss += layer_style_loss / (d * h * w)
        
    # calculate the *total* loss
    total_loss = content_weight * content_loss + style_weight * style_loss
    
    return total_loss

NST training¶

In [50]:
# for displaying the target image, intermittently
show_every = 400

# iteration hyperparameters
optimizer = optim.Adam([target], lr=0.003)
steps = 4000

for i in range(1, steps+1):
    total_loss = total_Loss()
    # update target image
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    
    # display intermediate images and print the loss
    if  i % show_every == 0:
        print('Total loss: ', total_loss.item())
        plt.imshow(im_convert(target))
        plt.show()
Total loss:  1872171.0
Total loss:  795139.75
Total loss:  467441.4375
Total loss:  320485.25
Total loss:  241523.828125
Total loss:  190064.71875
Total loss:  152707.59375
Total loss:  121467.6875
Total loss:  94425.796875
Total loss:  72068.015625
In [16]:
# display content and final, target image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.imshow(im_convert(content))
ax2.imshow(im_convert(target))
Out[16]:
<matplotlib.image.AxesImage at 0x7fa20ba85ee0>
In [17]:
plt.imshow(im_convert(target))
plt.axis('off')
plt.savefig('result.png')

part two¶

feature map visualization¶

In [21]:
from feature_map_extraction import *
In [22]:
# select image to do feature map visualization
content = load_image('mona.jpg')
In [23]:
vgg_19 = models.vgg19(pretrained=True)
l = get_features_forVGG(vgg_19)
conv_model = nn.Sequential(*l)
# layer selection followed by VGG-19 layout
layer_selection = ['2','5','8','11','15']
myNet = myModel(conv_model, layer_selection)
output = myNet(content)
In [24]:
# i-th layer that you want to see
index = 0 # see '2' layer in VGG-19
plot_feature_map(output, index)
In [25]:
# i-th layer that you want to see
index = 4 # see '15' layer in VGG-19
plot_feature_map(output, index)

part three¶

try on different hyperparameters¶

In [26]:
def train_NST_with_parameter_set(target, layers):
    
    steps_list = []
    loss_list = []
    image_list = []
    
    # for displaying the target image, intermittently
    show_every = 500

    # iteration hyperparameters
    optimizer = optim.Adam([target], lr=0.02)
    steps = 4500  # decide how many iterations to update your image (5000)

    for ii in range(1, steps+1):

        # get the features from your target image
        target_features = get_features(target, vgg, layers)

        # the content loss
        content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)

        # the style loss
        # initialize the style loss to 0
        style_loss = 0
        # then add to it for each layer's gram matrix loss
        for layer in style_weights:
            # get the "target" style representation for the layer
            target_feature = target_features[layer]
            target_gram = gram_matrix(target_feature)
            _, d, h, w = target_feature.shape
            # get the "style" style representation
            style_gram = style_grams[layer]
            # the style loss for one layer, weighted appropriately
            layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2)
            # add to the style loss
            style_loss += layer_style_loss / (d * h * w)

        # calculate the *total* loss

        # var loss
        #var_loss = variation_loss(target)

        total_loss = content_weight * content_loss + style_weight * style_loss

        # update your target image
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # display intermediate images and print the loss
        if  ii % show_every == 0 or ii == 1:
            print('Total loss: ', total_loss.item())
            plt.imshow(im_convert(target))
            steps_list.append(ii)
            loss_list.append(total_loss.item())
            image_list.append(im_convert(target))
            plt.show()
    return steps_list, loss_list, image_list
In [27]:
import matplotlib.pyplot as plt
def plot_sequential_fig(list_image):
    row = len(list_image)
    col = len(list_image[0])
    fig = plt.figure(figsize=(20, 6))
    counter = 1
    for i in range(row):
        for j in range(col):
            fig.add_subplot(row, col, counter)
            counter += 1
            plt.axis('off')
            plt.imshow(list_image[i][j])

    fig.subplots_adjust(
                        wspace=0.0, 
                        hspace=0.0)
In [28]:
import matplotlib.pyplot as plt
def plot_loss_curve(list_steps, list_losses):
    n = len(list_steps)
    for i in range(n):
        label = "hyperparameter set {}".format(i+1)
        plt.plot(list_steps[i], list_losses[i], label = label)
    plt.legend()
    plt.xlabel('steps')
    plt.ylabel('log(loss)')
In [40]:
def take_log(a):
    output = []
    for element in a:
        output.append(np.log(element))
    return output

different layer selection¶

In [29]:
default_layers = {'0': 'conv1_1',
                  '5': 'conv2_1', 
                  '10': 'conv3_1', 
                  '19': 'conv4_1',
                  '21': 'conv4_2',  ## content representation
                  '28': 'conv5_1'}

different_layers = {'2': 'conv1_1',
                  '7': 'conv2_1', 
                  '12': 'conv3_1', 
                  '16': 'conv4_1',
                  '25': 'conv4_2',  ## content representation
                  '34': 'conv5_1'}
In [31]:
# load in content and style image
content = load_image('pp1.jpg').to(device)
# Resize style to match content
style = load_image('mona.jpg', shape=content.shape[-2:]).to(device)
In [32]:
# parameter 1
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
#target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")

style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}

content_weight = 1  # alpha
style_weight = 1e6  # beta
In [33]:
a_1, b_1, c_1 = train_NST_with_parameter_set(target, default_layers)
Total loss:  65916780.0
Total loss:  727230.875
Total loss:  262740.6875
Total loss:  85118.703125
Total loss:  24611.595703125
Total loss:  8928.8359375
Total loss:  4387.43212890625
Total loss:  2734.06689453125
Total loss:  3994.117431640625
Total loss:  2060.72998046875
In [34]:
# parameter 2
content_features = get_features(content, vgg, different_layers)
style_features = get_features(style, vgg, different_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
#target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")

style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}

content_weight = 1  # alpha
style_weight = 1e6  # beta
In [35]:
a_2, b_2, c_2 = train_NST_with_parameter_set(target, different_layers)
Total loss:  445096096.0
Total loss:  3018519.0
Total loss:  830121.5625
Total loss:  291254.9375
Total loss:  137533.203125
Total loss:  75332.703125
Total loss:  46018.44140625
Total loss:  30223.634765625
Total loss:  22594.34765625
Total loss:  23292.99609375
In [36]:
# parameter 3
content_features = get_features(content, vgg, default_layers)
style_features = get_features(style, vgg, default_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
#target = content.clone().requires_grad_(True).to(device)
target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")

style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}

content_weight = 1  # alpha
style_weight = 1  # beta
In [37]:
a_3, b_3, c_3 = train_NST_with_parameter_set(target, default_layers)
Total loss:  73.50526428222656
Total loss:  1.4252307415008545
Total loss:  0.41589152812957764
Total loss:  0.24418824911117554
Total loss:  0.1967148780822754
Total loss:  0.1740114837884903
Total loss:  0.1614297777414322
Total loss:  0.15213488042354584
Total loss:  0.15001021325588226
Total loss:  0.13758087158203125
In [38]:
# parameter 4
content_features = get_features(content, vgg, different_layers)
style_features = get_features(style, vgg, different_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
#target = content.clone().requires_grad_(True).to(device)
target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")

style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}

content_weight = 1  # alpha
style_weight = 1  # beta
In [39]:
a_4, b_4, c_4 = train_NST_with_parameter_set(target, different_layers)
Total loss:  644.386474609375
Total loss:  6.975801944732666
Total loss:  2.344684600830078
Total loss:  0.9244899153709412
Total loss:  0.48602306842803955
Total loss:  0.3218299150466919
Total loss:  0.24496090412139893
Total loss:  0.2009422332048416
Total loss:  0.17362934350967407
Total loss:  0.15529181063175201
In [41]:
# loss curve plot
list_steps = [a_1, a_2, a_3, a_4]
list_losses = [take_log(b_1), take_log(b_2), take_log(b_3), take_log(b_4)]
plot_loss_curve(list_steps, list_losses)
In [42]:
# sequential output image
list_image = [c_1, c_2, c_3, c_4]
plot_sequential_fig(list_image)
In [ ]: